Skip to content
Snippets Groups Projects
Commit 1ca6f0a2 authored by Hashim Sharif's avatar Hashim Sharif
Browse files

merging

parents 544becdb 994adbd8
No related branches found
No related tags found
No related merge requests found
Showing
with 3513 additions and 0 deletions
1114.3009809999999
+++++
conf1 1 1 84.76 0.0
1 gpu conv fp32 11 add fp32 1 tanh fp32 1
2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
3 gpu conv fp32 11 add fp32 1 tanh fp32 1
4 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
5 gpu conv fp32 11 add fp32 1 tanh fp32 1
6 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
7 gpu mul fp32 11 add fp32 1
8 gpu softmax fp32 1
-----
+++++
conf2 1.678391931801309 1.4393008204786808 84.76 0.0
1 gpu conv fp16 12 add fp16 1 tanh fp16 1
2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1
6 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
7 gpu mul fp16 12 add fp16 1
8 gpu softmax fp32 1
-----
2592.187221
+++++
conf1 1 1 78.78 0.0
1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
3 gpu conv fp32 11 add fp32 1 tanh fp32 1
4 gpu conv fp32 11 add fp32 1 tanh fp32 1
5 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
6 gpu mul fp32 11 add fp32 1
7 gpu softmax fp32 1
-----
+++++
conf2 1.7593976485873195 1.6193399031642917 78.78 0.0
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf3 2.081712090729918 1.9102226906341664 78.53999999999999 0.2400000000000091
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf4 2.081712090729918 1.9102226906341664 78.53999999999999 0.2400000000000091
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf5 2.2627828537139263 2.065683616898884 78.34 0.4399999999999977
1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf6 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf7 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf8 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf9 2.2247938983110425 2.060416584958474 77.98 0.7999999999999972
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf10 2.2247938983110425 2.060416584958474 77.98 0.7999999999999972
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf11 2.4370818494175888 2.250857540113024 77.98 0.7999999999999972
1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf12 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955
1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf13 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955
1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf14 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955
1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf15 2.228328207535687 2.0675123320068267 77.82 0.960000000000008
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf16 2.228328207535687 2.0675123320068267 77.82 0.960000000000008
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf17 2.3417491169395532 2.1355030360671465 77.78 1.0
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf18 2.3417491169395532 2.1355030360671465 77.78 1.0
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf19 2.3417491169395532 2.1355030360671465 77.78 1.0
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv fp16 12 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf20 2.5243776633638846 2.324968713897418 77.78 1.0
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf21 2.5243776633638846 2.324968713897418 77.78 1.0
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf22 2.5243776633638846 2.324968713897418 77.78 1.0
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf23 2.5371416718362823 2.3372173527293847 77.56 1.2199999999999989
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf24 2.5371416718362823 2.3372173527293847 77.56 1.2199999999999989
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf25 2.472472828611022 2.286262888143739 77.48 1.2999999999999972
1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf26 2.574475112841438 2.3637004022727544 77.4 1.3799999999999955
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf27 2.1200397577541747 1.951741010849448 77.3 1.480000000000004
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf28 2.1200397577541747 1.951741010849448 77.3 1.480000000000004
1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf29 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf30 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf31 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf32 2.541739061163583 2.3463519042470864 77.18 1.5999999999999943
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf33 2.541739061163583 2.3463519042470864 77.18 1.5999999999999943
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf34 2.580258965052788 2.3848508703934153 76.96 1.8200000000000074
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf35 2.580258965052788 2.3848508703934153 76.96 1.8200000000000074
1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf36 2.4768386387310675 2.295002745725082 76.94 1.8400000000000034
1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf37 2.5713008246729716 2.3684101116633007 76.94 1.8400000000000034
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf38 2.5713008246729716 2.3684101116633007 76.94 1.8400000000000034
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf39 2.5670585645212847 2.3720992406158463 76.92 1.8599999999999994
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf40 2.5670585645212847 2.3720992406158463 76.92 1.8599999999999994
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1
5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf41 2.5760229577267673 2.3777906009584133 76.9 1.8799999999999955
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
+++++
conf42 2.5760229577267673 2.3777906009584133 76.9 1.8799999999999955
1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1
3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1
4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1
5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1
7 gpu softmax fp32 1
-----
2739.950736
+++++
conf1 1 1 56.3 0.0
1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 11 add fp32 1 relu fp32 1
4 gpu conv fp32 11 add fp32 1 relu fp32 1
5 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
6 gpu mul fp32 11 add fp32 1 relu fp32 1
7 gpu mul fp32 11 add fp32 1 relu fp32 1
8 gpu mul fp32 11 add fp32 1
9 gpu softmax fp32 1
-----
+++++
conf2 1.802133644103582 1.8186433204507424 55.76 0.5399999999999991
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv fp16 12 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf3 2.0227701930718065 2.043112495268932 55.42 0.8799999999999955
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv fp16 12 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf4 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf5 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf6 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf7 2.085011755614172 2.122606306624671 54.92 1.3799999999999955
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv fp16 12 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf8 2.085011755614172 2.122606306624671 54.92 1.3799999999999955
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv fp16 12 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf9 1.8052659214923805 1.8217111622759978 54.82 1.4799999999999969
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf10 2.0146435217865446 2.0367475358800102 54.58 1.7199999999999989
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv fp16 12 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf11 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf12 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf13 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf14 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf15 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf16 2.028037341700216 2.049760395549724 53.98 2.3200000000000003
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf17 2.028037341700216 2.049760395549724 53.98 2.3200000000000003
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf18 2.028037341700216 2.049760395549724 53.98 2.3200000000000003
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf19 1.8052659214923805 1.8217111622759978 53.879999999999995 2.4200000000000017
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 11 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf20 1.8052659214923805 1.8217111622759978 53.879999999999995 2.4200000000000017
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 11 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf21 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf22 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf23 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
+++++
conf24 2.0185588815268836 2.0405961127674277 53.559999999999995 2.740000000000002
1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1
5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1
6 gpu mul fp16 12 add fp16 1 relu fp16 1
7 gpu mul fp16 12 add fp16 1 relu fp16 1
8 gpu mul fp16 12 add fp16 1
9 gpu softmax fp32 1
-----
282.5141369999999
+++++
conf1 1 1 98.7 0.0
1 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1
2 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1
3 gpu mul fp32 11 add fp32 1 tanh fp32 1
4 gpu mul fp32 11 add fp32 1 tanh fp32 1
5 gpu softmax fp32 1
-----
+++++
conf2 1.9343699741206566 2.1183040240042 98.68 0.01999999999999602
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf3 1.9343699741206566 2.1183040240042 98.68 0.01999999999999602
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf4 1.8936889628815377 2.139779619692146 98.68 0.01999999999999602
1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf5 1.8936889628815377 2.139779619692146 98.68 0.01999999999999602
1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf6 1.6415764141643088 1.8012120076077847 98.66 0.04000000000000625
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf7 1.9358279784215788 2.1233340385374495 98.66 0.04000000000000625
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf8 1.9358279784215788 2.1233340385374495 98.66 0.04000000000000625
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf9 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf10 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf11 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf12 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf13 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf14 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf15 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf16 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf17 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf18 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf19 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf20 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf21 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf22 1.9040998718547615 2.1501783570812565 98.61999999999999 0.0800000000000125
1 gpu conv perf_fp16 151 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf23 1.9040998718547615 2.1501783570812565 98.61999999999999 0.0800000000000125
1 gpu conv perf_fp16 151 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf24 1.5630416487818 1.7451546885860074 98.6 0.10000000000000853
1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf25 1.5630416487818 1.7451546885860074 98.6 0.10000000000000853
1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf26 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf27 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf28 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf29 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf30 2.1941568976363475 2.4445764373737644 98.6 0.10000000000000853
1 gpu conv samp_fp16 269 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf31 2.1941568976363475 2.4445764373737644 98.6 0.10000000000000853
1 gpu conv samp_fp16 269 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf32 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf33 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf34 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf35 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf36 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf37 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455
1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf38 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf39 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf40 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455
1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf41 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455
1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf42 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057
1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf43 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057
1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf44 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057
1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf45 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057
1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf46 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057
1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf47 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057
1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf48 1.6319327047042609 1.8046853367113418 98.54 0.1599999999999966
1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf49 1.6350106933897723 1.8435952834193967 98.52 0.18000000000000682
1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf50 1.6350106933897723 1.8435952834193967 98.52 0.18000000000000682
1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
+++++
conf51 1.6510114896409525 1.8591762752048948 98.48 0.21999999999999886
1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1
2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1
3 gpu mul fp16 12 add fp16 1 tanh fp16 1
4 gpu mul fp16 12 add fp16 1 tanh fp16 1
5 gpu softmax fp32 1
-----
2484.981244
+++++
conf1 1 1 89.42 0.0
1 gpu conv fp32 11 add fp32 1 relu fp32 1
2 gpu conv fp32 11 add fp32 1 relu fp32 1
3 gpu conv fp32 11 add fp32 1
4 gpu add fp32 11
5 gpu relu fp32 11
6 gpu conv fp32 11 add fp32 1 relu fp32 1
7 gpu conv fp32 11 add fp32 1
8 gpu add fp32 11
9 gpu relu fp32 11
10 gpu conv fp32 11 add fp32 1 relu fp32 1
11 gpu conv fp32 11 add fp32 1
12 gpu add fp32 11
13 gpu relu fp32 11
14 gpu conv fp32 11 add fp32 1 relu fp32 1
15 gpu conv fp32 11 add fp32 1
16 gpu conv fp32 11 add fp32 1
17 gpu add fp32 11
18 gpu relu fp32 11
19 gpu conv fp32 11 add fp32 1 relu fp32 1
20 gpu conv fp32 11 add fp32 1
21 gpu add fp32 11
22 gpu relu fp32 11
23 gpu conv fp32 11 add fp32 1 relu fp32 1
24 gpu conv fp32 11 add fp32 1
25 gpu add fp32 11
26 gpu relu fp32 11
27 gpu conv fp32 11 add fp32 1 relu fp32 1
28 gpu conv fp32 11 add fp32 1
29 gpu conv fp32 11 add fp32 1
30 gpu add fp32 11
31 gpu relu fp32 11
32 gpu conv fp32 11 add fp32 1 relu fp32 1
33 gpu conv fp32 11 add fp32 1
34 gpu add fp32 11
35 gpu relu fp32 11
36 gpu conv fp32 11 add fp32 1 relu fp32 1
37 gpu conv fp32 11 add fp32 1
38 gpu add fp32 11
39 gpu relu fp32 11
40 gpu pool_mean fp32 11
41 gpu mul fp32 11 add fp32 1
42 gpu softmax fp32 1
-----
+++++
conf2 1.3617910209460897 1.3866827244386561 89.42 0.0
1 gpu conv fp16 12 add fp16 1 relu fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1
3 gpu conv fp16 12 add fp16 1
4 gpu add fp16 12
5 gpu relu fp16 12
6 gpu conv fp16 12 add fp16 1 relu fp16 1
7 gpu conv fp16 12 add fp16 1
8 gpu add fp16 12
9 gpu relu fp16 12
10 gpu conv fp16 12 add fp16 1 relu fp16 1
11 gpu conv fp16 12 add fp16 1
12 gpu add fp16 12
13 gpu relu fp16 12
14 gpu conv fp16 12 add fp16 1 relu fp16 1
15 gpu conv fp16 12 add fp16 1
16 gpu conv fp16 12 add fp16 1
17 gpu add fp16 12
18 gpu relu fp16 12
19 gpu conv fp16 12 add fp16 1 relu fp16 1
20 gpu conv fp16 12 add fp16 1
21 gpu add fp16 12
22 gpu relu fp16 12
23 gpu conv fp16 12 add fp16 1 relu fp16 1
24 gpu conv fp16 12 add fp16 1
25 gpu add fp16 12
26 gpu relu fp16 12
27 gpu conv fp16 12 add fp16 1 relu fp16 1
28 gpu conv fp16 12 add fp16 1
29 gpu conv fp16 12 add fp16 1
30 gpu add fp16 12
31 gpu relu fp16 12
32 gpu conv fp16 12 add fp16 1 relu fp16 1
33 gpu conv fp16 12 add fp16 1
34 gpu add fp16 12
35 gpu relu fp16 12
36 gpu conv fp16 12 add fp16 1 relu fp16 1
37 gpu conv fp16 12 add fp16 1
38 gpu add fp16 12
39 gpu relu fp16 12
40 gpu pool_mean fp16 12
41 gpu mul fp16 12 add fp16 1
42 gpu softmax fp32 1
-----
3776.508929999999
+++++
conf1 1 1 89.96 0.0
1 gpu conv fp32 11 add fp32 1 relu fp32 1
2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 11 add fp32 1 relu fp32 1
4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
5 gpu conv fp32 11 add fp32 1 relu fp32 1
6 gpu conv fp32 11 add fp32 1 relu fp32 1
7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
8 gpu conv fp32 11 add fp32 1 relu fp32 1
9 gpu conv fp32 11 add fp32 1 relu fp32 1
10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
11 gpu conv fp32 11 add fp32 1 relu fp32 1
12 gpu conv fp32 11 add fp32 1 relu fp32 1
13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
14 gpu mul fp32 11 add fp32 1 relu fp32 1
15 gpu mul fp32 11 add fp32 1
16 gpu softmax fp32 1
-----
+++++
conf2 2.4192803184847484 2.2393153800931898 89.22 0.7399999999999949
1 gpu conv fp16 12 add fp16 1 relu fp16 1
2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 12 add fp16 1 relu fp16 1
6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1
12 gpu conv perf_fp16 152 add fp16 1 relu fp16 1
13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 12 add fp16 1 relu fp16 1
15 gpu mul fp16 12 add fp16 1
16 gpu softmax fp32 1
-----
+++++
conf3 2.1240075032467187 1.9749367321301132 88.64 1.3199999999999932
1 gpu conv fp16 11 add fp16 1 relu fp16 1
2 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 12 add fp16 1 relu fp16 1
6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv perf_fp16 152 add fp16 1 relu fp16 1
9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1
10 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1
12 gpu conv perf_fp16 151 add fp16 1 relu fp16 1
13 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 12 add fp16 1 relu fp16 1
15 gpu mul fp16 12 add fp16 1
16 gpu softmax fp32 1
-----
3768.819777999999
+++++
conf1 1 1 66.5 0.0
1 gpu conv fp32 11 add fp32 1 relu fp32 1
2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 11 add fp32 1 relu fp32 1
4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
5 gpu conv fp32 11 add fp32 1 relu fp32 1
6 gpu conv fp32 11 add fp32 1 relu fp32 1
7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
8 gpu conv fp32 11 add fp32 1 relu fp32 1
9 gpu conv fp32 11 add fp32 1 relu fp32 1
10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
11 gpu conv fp32 11 add fp32 1 relu fp32 1
12 gpu conv fp32 11 add fp32 1 relu fp32 1
13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
14 gpu mul fp32 11 add fp32 1 relu fp32 1
15 gpu mul fp32 11 add fp32 1
16 gpu softmax fp32 1
-----
+++++
conf2 2.2793321208062913 2.0502797911533945 66.42 0.0799999999999983
1 gpu conv fp16 12 add fp16 1 relu fp16 1
2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 12 add fp16 1 relu fp16 1
6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1
7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv fp16 12 add fp16 1 relu fp16 1
9 gpu conv samp_fp16 267 add fp16 1 relu fp16 1
10 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
12 gpu conv fp16 11 add fp16 1 relu fp16 1
13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 12 add fp16 1 relu fp16 1
15 gpu mul fp16 12 add fp16 1
16 gpu softmax fp32 1
-----
+++++
conf3 2.2793321208062913 2.0502797911533945 66.42 0.0799999999999983
1 gpu conv fp16 12 add fp16 1 relu fp16 1
2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 12 add fp16 1 relu fp16 1
6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1
7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv fp16 12 add fp16 1 relu fp16 1
9 gpu conv samp_fp16 267 add fp16 1 relu fp16 1
10 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
12 gpu conv fp16 11 add fp16 1 relu fp16 1
13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 12 add fp16 1 relu fp16 1
15 gpu mul fp16 12 add fp16 1
16 gpu softmax fp32 1
-----
+++++
conf4 2.664296720624579 2.427276363573644 64.7 1.7999999999999972
1 gpu conv fp16 12 add fp16 1 relu fp16 1
2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 12 add fp16 1 relu fp16 1
6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
7 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1
9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1
10 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1
12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1
13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 12 add fp16 1 relu fp16 1
15 gpu mul fp16 12 add fp16 1
16 gpu softmax fp32 1
-----
19194.623482
+++++
conf1 1 1 72.84 0.0
1 gpu conv fp32 11 add fp32 1 relu fp32 1
2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 11 add fp32 1 relu fp32 1
4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
5 gpu conv fp32 11 add fp32 1 relu fp32 1
6 gpu conv fp32 11 add fp32 1 relu fp32 1
7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
8 gpu conv fp32 11 add fp32 1 relu fp32 1
9 gpu conv fp32 11 add fp32 1 relu fp32 1
10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
11 gpu conv fp32 11 add fp32 1 relu fp32 1
12 gpu conv fp32 11 add fp32 1 relu fp32 1
13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
14 gpu mul fp32 11 add fp32 1 relu fp32 1
15 gpu mul fp32 11 add fp32 1 relu fp32 1
16 gpu mul fp32 11 add fp32 1
17 gpu softmax fp32 1
-----
+++++
conf2 1.7719381411481732 1.5850925672384186 72.84 0.0
1 gpu conv fp16 12 add fp16 1 relu fp16 1
2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 12 add fp16 1 relu fp16 1
4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 12 add fp16 1 relu fp16 1
6 gpu conv fp16 12 add fp16 1 relu fp16 1
7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv fp16 12 add fp16 1 relu fp16 1
9 gpu conv fp16 12 add fp16 1 relu fp16 1
10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv fp16 12 add fp16 1 relu fp16 1
12 gpu conv fp16 12 add fp16 1 relu fp16 1
13 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 12 add fp16 1 relu fp16 1
15 gpu mul fp16 12 add fp16 1 relu fp16 1
16 gpu mul fp16 12 add fp16 1
17 gpu softmax fp32 1
-----
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment