From 07b1f2643bd98cebc78b90dfa33bd4d195776a86 Mon Sep 17 00:00:00 2001
From: Elizabeth <hashim.sharif91@gmail.com>
Date: Sun, 17 Nov 2019 17:50:21 -0600
Subject: [PATCH] Fixsed tanh/poolmax order for fp32 baseline

---
 .../tuner_pareto_confs_batch220.txt           | 682 +++++++++---------
 1 file changed, 345 insertions(+), 337 deletions(-)

diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt
index 2e3185632c..707fd70be0 100644
--- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt
+++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt
@@ -1,896 +1,904 @@
 +++++
+conf1 1 0 99.69 0
+1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
+2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
+3 gpu mul fp32 1 add fp32 1 tanh fp32 1 
+4 gpu mul fp32 1 add fp32 1 tanh fp32 1 
+5 gpu softmax fp32 1
+-----
++++++
 conf1 2.01610051566 0 99.400002 0.6899979999999971
-1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 
-2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 
-3 gpu mul fp16 1 add fp32 1 tanh fp32 1 
-4 gpu mul fp16 1 add fp32 1 tanh fp32 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
+4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 5 gpu softmax fp32 1
 -----
 +++++
 conf2 2.01610051566 0 99.040001 0.974998499999991
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf3 2.00016617632 0 99.68 0.4099999999999909
-1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf4 2.00016617632 0 99.660004 0.42999599999999705
-1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf5 1.97610564729 0 99.599998 0.4900019999999984
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf6 2.00016617632 0 99.599998 0.4900019999999984
-1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf7 2.00016617632 0 99.080002 0.9149970000000067
-1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf8 2.00016617632 0 99.239998 0.6750029999999967
-1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf9 2.00016617632 0 99.199997 0.7350045000000023
-1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf10 1.99590274244 0 99.099998 0.8850029999999975
-1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf11 2.01610051566 0 99.559998 0.5300020000000046
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf12 1.99590274244 0 99.540001 0.549998999999994
-1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf13 2.00016617632 0 99.639999 0.45000099999999466
-1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf14 1.99590274244 0 99.580002 0.5099980000000045
-1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf15 2.01610051566 0 99.099998 0.8850029999999975
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf16 2.01610051566 0 99.160004 0.7949939999999955
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf17 2.00016617632 0 99.379997 0.46500449999999205
-1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf18 1.99590274244 0 99.639999 0.45000099999999466
-1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf19 2.01610051566 0 99.580002 0.5099980000000045
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf20 1.97610564729 0 99.660004 0.42999599999999705
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf21 1.99590274244 0 99.440002 0.6499979999999909
-1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf22 1.99590274244 0 99.260002 0.6449969999999965
-1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf23 2.00016617632 0 99.360001 0.49499850000000123
-1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf24 2.01610051566 0 99.32 0.5550000000000068
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf25 2.00016617632 0 99.519997 0.5700029999999942
-1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf26 1.97610564729 0 99.379997 0.46500449999999205
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf27 2.01610051566 0 99.68 0.4099999999999909
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf28 2.00016617632 0 99.559998 0.5300020000000046
-1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf29 2.00016617632 0 99.080002 0.9149970000000067
-1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf30 1.97610564729 0 99.660004 0.42999599999999705
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf31 2.01610051566 0 99.599998 0.4900019999999984
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf32 1.97610564729 0 99.080002 0.9149970000000067
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf33 2.01610051566 0 99.620003 0.4699970000000008
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf34 2.00016617632 0 99.620003 0.4699970000000008
-1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf35 2.00016617632 0 99.599998 0.4900019999999984
-1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf36 1.99590274244 0 99.599998 0.4900019999999984
-1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf37 2.01610051566 0 99.540001 0.549998999999994
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf38 2.00016617632 0 99.339996 0.5250059999999976
-1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf39 2.00016617632 0 99.599998 0.4900019999999984
-1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf40 1.97610564729 0 99.379997 0.46500449999999205
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf41 2.00016617632 0 99.559998 0.5300020000000046
-1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf42 1.99590274244 0 99.459999 0.6300010000000015
-1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf43 1.99590274244 0 99.400002 0.6899979999999971
-1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf44 2.00016617632 0 99.599998 0.4900019999999984
-1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf45 2.01610051566 0 99.599998 0.4900019999999984
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf46 2.01610051566 0 99.080002 0.9149970000000067
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf47 2.01610051566 0 99.660004 0.42999599999999705
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf48 2.00016617632 0 99.639999 0.45000099999999466
-1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf49 2.01610051566 0 99.480003 0.6099970000000013
-1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf50 2.00016617632 0 98.400002 1.9349969999999956
-1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf51 2.01610051566 0 98.540001 1.724998499999991
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf52 2.01610051566 0 99.080002 0.9149970000000067
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf53 2.00016617632 0 99.660004 0.42999599999999705
-1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf54 2.01610051566 0 99.660004 0.42999599999999705
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf55 1.97610564729 0 99.599998 0.4900019999999984
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf56 2.01610051566 0 98.900002 1.1849969999999956
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf57 1.99590274244 0 99.099998 0.8850029999999975
-1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf58 2.01610051566 0 99.580002 0.5099980000000045
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf59 1.97610564729 0 99.080002 0.9149970000000067
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf60 2.01610051566 0 98.959999 1.0950015000000022
-1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf61 2.01610051566 0 99.220001 0.7049985000000021
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf62 2.01610051566 0 98.839996 1.2750059999999976
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf63 1.99590274244 0 98.940002 1.1249969999999863
-1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf64 1.97610564729 0 99.379997 0.46500449999999205
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf65 2.00016617632 0 99.559998 0.5300020000000046
-1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf66 2.00016617632 0 99.239998 0.6750029999999967
-1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf67 2.01610051566 0 99.459999 0.6300010000000015
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf68 2.00016617632 0 99.360001 0.49499850000000123
-1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf69 2.01610051566 0 99.559998 0.5300020000000046
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf70 1.99590274244 0 99.440002 0.6499979999999909
-1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf71 2.00016617632 0 99.339996 0.5250059999999976
-1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf72 2.01610051566 0 99.32 0.5550000000000068
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf73 1.97610564729 0 99.379997 0.46500449999999205
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf74 2.00016617632 0 99.019997 1.0050044999999912
-1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf75 1.99590274244 0 99.260002 0.6449969999999965
-1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf76 2.01610051566 0 99.099998 0.8850029999999975
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf77 1.97610564729 0 98.440002 1.8749969999999863
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf78 2.01610051566 0 98.440002 1.8749969999999863
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf79 2.01610051566 0 99.160004 0.7949939999999955
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf80 1.97610564729 0 98.480003 1.814995500000002
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf81 2.00016617632 0 99.360001 0.49499850000000123
-1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf82 1.97610564729 0 99.660004 0.42999599999999705
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf83 1.99590274244 0 99.540001 0.549998999999994
-1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf84 2.00016617632 0 99.199997 0.7350045000000023
-1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf85 1.97610564729 0 98.440002 1.8749969999999863
-1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf86 2.00016617632 0 99.0 1.0349999999999966
-1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf87 1.99590274244 0 98.519997 1.7550044999999912
-1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf88 2.01610051566 0 99.400002 0.6899979999999971
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf89 2.01610051566 0 97.760002 2.8949969999999965
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf90 2.01610051566 0 99.519997 0.5700029999999942
-1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf91 2.01610051566 0 99.32 0.5550000000000068
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf92 2.01610051566 0 99.580002 0.5099980000000045
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf93 2.01610051566 0 99.480003 0.6099970000000013
-1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf94 2.01610051566 0 98.480003 1.814995500000002
-1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf95 2.01610051566 0 98.540001 1.724998499999991
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf96 2.01610051566 0 97.82 2.805000000000007
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf97 2.01610051566 0 98.959999 1.0950015000000022
-1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf98 2.01610051566 0 98.459999 1.8450015000000022
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf99 2.01610051566 0 99.660004 0.42999599999999705
-1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf100 2.01610051566 0 99.620003 0.4699970000000008
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf101 2.01610051566 0 97.699997 2.9850045000000023
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf102 2.01610051566 0 99.040001 0.974998499999991
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf103 2.01610051566 0 98.0 2.5349999999999966
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf104 2.01610051566 0 99.160004 0.7949939999999955
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf105 2.01610051566 0 99.540001 0.549998999999994
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf106 2.01610051566 0 99.519997 0.5700029999999942
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf107 2.01610051566 0 99.099998 0.8850029999999975
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf108 2.01610051566 0 98.120003 2.354995500000001
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf109 2.01610051566 0 99.459999 0.6300010000000015
-1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf110 2.01610051566 0 99.68 0.4099999999999909
-1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf111 2.01610051566 0 98.839996 1.2750059999999976
-1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
 +++++
 conf112 2.01610051566 0 98.18 2.2649999999999864
-1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 
-2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 
+1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 
+2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 
 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 
 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 
-5 gpu softmax fp16 1
+5 gpu softmax fp32 1
 -----
-- 
GitLab