diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt
index 32a9642d38ab816246b9e5cca01c6efcec3a2d8d..b8224da41c52f093bd61a23bae05eb09f39148a0 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt
@@ -9,400 +9,400 @@ conf1 1 1 98.7 0.0
 -----
 +++++
 conf2 1.828613181003043 2.071721708828981 98.65 0.04999999999999716
-1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf3 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716
-1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 152 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf4 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716
-1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 152 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf5 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716
-1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 152 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf6 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf7 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf8 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf9 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf10 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf11 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274
-1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 155 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf12 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274
-1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 155 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf13 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274
-1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 155 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf14 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125
-1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf15 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125
-1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf16 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125
-1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf17 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125
-1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf18 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125
-1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf19 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125
-1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf20 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
-1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 12 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf21 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
-1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 12 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf22 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
-1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 12 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf23 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf24 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf25 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf26 2.200653361151419 2.425091789360736 98.6 0.10000000000000853
-1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv samp_fp16 266 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf27 2.200653361151419 2.425091789360736 98.6 0.10000000000000853
-1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv samp_fp16 266 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf28 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf29 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf30 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455
-1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv fp16 11 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf31 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455
-1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf32 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455
-1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf33 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455
-1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 156 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf34 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455
-1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 158 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf35 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455
-1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 158 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf36 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455
-1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 158 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf37 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057
-1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf38 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057
-1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf39 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057
-1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf40 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057
-1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf41 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057
-1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf42 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057
-1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf43 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057
-1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 153 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf44 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057
-1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 153 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf45 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057
-1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 153 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf46 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966
-1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 159 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf47 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966
-1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 159 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf48 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682
-1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 165 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf49 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682
-1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 165 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf50 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682
-1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 165 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1
 -----
 +++++
 conf51 1.8534621507951072 2.1231113105788597 98.44000000000001 0.2599999999999909
-1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1
-2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1
+1 gpu conv perf_fp16 159 add fp16 1 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 261 add fp16 1 tanh fp16 12 pool_max fp16 12
 3 gpu mul fp16 12 add fp16 1 tanh fp16 1
 4 gpu mul fp16 12 add fp16 1 tanh fp16 1
 5 gpu softmax fp32 1