Skip to content
Snippets Groups Projects
Commit 9838969c authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Added quant range and default config for promise-targeted benchmarks

parent 70eca485
No related branches found
No related tags found
No related merge requests found
Showing
with 755 additions and 0 deletions
1 -1.88164262419 2.09340954985 -0.33087718 0.3323643 -0.7782218 0.6020472 -0.978641152382 0.998945295811
2 -0.978641152382 0.998945295811 -0.2095158 0.33543423 -0.45020863 0.30596754 -0.999703943729 0.999930202961
3 -0.999703943729 0.999930202961 -0.1715614 0.17037082 -0.6519161 0.5939945 -0.999933600426 0.999940037727
4 -0.999933600426 0.999940037727 -0.15575546 0.14456555 -0.55873865 0.4704539 -0.99999910593 0.999999344349
5 -0.99999910593 0.999999344349 -0.16108225 0.16864482 -0.22135437 0.10401678 -0.999434411526 0.999634206295
6 -0.999434411526 0.999634206295 -0.18183032 0.19018902 -0.07189204 0.106005594 -15.0765653801 19.4225852203
7 0 0 0 0 0 0 0 0
2000
+++++
conf1 3.86 0 79.1 0.0
1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
3 gpu conv fp32 1 add fp32 1 tanh fp32 1
4 gpu conv fp32 1 add fp32 1 tanh fp32 1
5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
6 gpu mul fp32 1 add fp32 1
7 gpu softmax fp32 1
-----
1 -1.8816435 2.0934134 -0.5421946 0.3710851 -0.06697306 0.040868897 -0.775027394891 0.779944300652
2 -0.775027394891 0.779944300652 -0.42474225 0.31460348 -0.3557253 -0.17281663 -0.808667064309 0.983953297734
3 -0.808667064309 0.983953297734 -0.44134507 0.79587924 -0.80424446 0.75330096 -0.995678424835 0.998566448689
4 -0.995678424835 0.998566448689 -0.2883836 0.31025785 -0.6353164 0.29015934 -0.993219196796 0.992379009724
5 -0.993219196796 0.992379009724 -0.2792431 0.37689754 -1.1379756 1.2391574 -0.999901354313 0.999910891056
6 -0.999901354313 0.999910891056 -0.27078503 0.27942517 -0.503003 0.12762362 -0.991036117375 0.971404970288
7 -0.991036117375 0.971404970288 -0.24273404 0.5845544 -0.53745 0.558251 -119.27973732 -25.2262819576
8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2000
+++++
conf1 2.64294896823 0 84.24999995 -0.05999995000000524
1 gpu conv fp32 1 add fp32 1 tanh fp32 1
2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
3 gpu conv fp32 1 add fp32 1 tanh fp32 1
4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
5 gpu conv fp32 1 add fp32 1 tanh fp32 1
6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
7 gpu mul fp32 1 add fp32 1
8 gpu softmax fp32 1
-----
1 0.0 255.0 0.5811487324237921 -0.5503702693581581 1.648145 -2.802485 0.0 1572.3096923828125
2 0.0 1572.3096923828125 0.26272463005783797 -0.2867645202279091 0.501206 -0.47985682 0.0 3183.7813264160477
3 0.0 3183.7813264160477 0.15785247704386754 -0.16606662392616273 0.5545839 -0.42038992 0.0 1765.4451872558668
4 0.0 1765.4451872558668 0.11035470351576919 -0.10464580833911895 0.9042998 -1.4275751 0.0 1345.5418548586083
5 0.0 1345.5418548586083 0.10250756608694818 -0.09240880391001702 2.4040315 -0.45662758 0.0 1227.3563232421875
6 0.0 1227.3563232421875 0.02963459612801672 -0.030517672039568428 0.09377053 -0.07124679 0.0 1034.5966391601676
7 0.0 1034.5966391601676 0.039147199764847845 -0.038392101023346184 0.1841282 -0.050027702 0.0 839.0697069702154
8 0.0 839.0697069702154 0.08549865524470925 -0.05494491942599416 0.15416704 -0.16314922 -608.3993963623047 1082.8444653320819
9 0 0 0 0 0 0 0 0
750.80768325
+++++
conf1 1.0 0 79.1 0.0
1 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
2 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 1 add fp32 1 relu fp32 1
4 gpu conv fp32 1 add fp32 1 relu fp32 1
5 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
6 gpu mul fp32 1 add fp32 1 relu fp32 1
7 gpu mul fp32 1 add fp32 1 relu fp32 1
8 gpu mul fp32 1 add fp32 1
9 gpu softmax fp32 1
-----
1 0 1 -1 1 -1 1 -1 1
2 -1 1 -1 1 -1 1 -1 1
3 -1 1 -1 1 -1 1 -1 1
4 -1 1 -1 1 -1 1 -1 1
2000
+++++
conf1 1 0 99.69 0
1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1
3 gpu mul fp32 1 add fp32 1 tanh fp32 1
4 gpu mul fp32 1 add fp32 1 tanh fp32 1
5 gpu softmax fp32 1
-----
1 -1.9892114 2.126797 -2.19630692005 1.34758170414 0.0 0.0 -60.892750473 51.9925691605
2 0.0 5.71354155397 -0.931772116065 1.07742589378 0.0 0.0 -6.51858950329 6.81084251881
3 0.0 4.93213940287 -0.531654466152 0.57537904036 0.0 0.0 -4.48263123512 3.96730119753
4 0.0 4.10326339769 -0.362340988219 0.407691390038 0.0 0.0 -4.04261828327 3.8867793293
5 0.0 5.38322130251 -0.313120054901 0.293576799393 0.0 0.0 -5.92146921539 4.33867932415
6 0.0 4.31673815441 -0.232992478013 0.258029025793 0.0 0.0 -4.20778994751 3.93243697071
7 0.0 5.8304081068 -0.202337772191 0.189983081758 0.0 0.0 -6.29828691578 4.84813511753
8 0.0 4.44641780996 -0.174427356511 0.176958308667 0.0 0.0 -4.34791088581 3.61443646955
9 0.0 4.5180956049 -0.145467961878 0.15256431669 0.0 0.0 -3.02877027559 2.94873657799
10 0.0 6.34857563496 -0.130258745223 0.135582433432 0.0 0.0 -4.22931008053 3.53150463724
11 0.0 5.22100311041 -0.119001727596 0.125363747835 0.0 0.0 -4.03820378017 4.00400940704
12 0.0 5.73249834776 -0.108397216856 0.116256686077 0.0 0.0 -3.31110151148 4.46293323326
13 0.0 7.24049821186 -0.0862374496162 0.0885944995135 0.0 0.0 -4.17543139458 6.2043294754
14 0.0 7.81395883465 -0.0681302513927 0.0700202777982 0.0 0.0 -10.9205664234 2.64429125786
15 0.0 2.86920666504 -0.223010196954 0.14426593782 -0.1654396 0.23336112 -12.2459499588 23.8053251343
1000
+++++
conf1 1 0 84.8 0
1 gpu conv fp32 1
2 gpu batchnorm fp32 1
3 gpu relu fp32 1
4 gpu group_conv fp32 1
5 gpu batchnorm fp32 1
6 gpu relu fp32 1
7 gpu conv fp32 1
8 gpu batchnorm fp32 1
9 gpu relu fp32 1
10 gpu group_conv fp32 1
11 gpu batchnorm fp32 1
12 gpu relu fp32 1
13 gpu conv fp32 1
14 gpu batchnorm fp32 1
15 gpu relu fp32 1
16 gpu group_conv fp32 1
17 gpu batchnorm fp32 1
18 gpu relu fp32 1
19 gpu conv fp32 1
20 gpu batchnorm fp32 1
21 gpu relu fp32 1
22 gpu group_conv fp32 1
23 gpu batchnorm fp32 1
24 gpu relu fp32 1
25 gpu conv fp32 1
26 gpu batchnorm fp32 1
27 gpu relu fp32 1
28 gpu group_conv fp32 1
29 gpu batchnorm fp32 1
30 gpu relu fp32 1
31 gpu conv fp32 1
32 gpu batchnorm fp32 1
33 gpu relu fp32 1
34 gpu group_conv fp32 1
35 gpu batchnorm fp32 1
36 gpu relu fp32 1
37 gpu conv fp32 1
38 gpu batchnorm fp32 1
39 gpu relu fp32 1
40 gpu group_conv fp32 1
41 gpu batchnorm fp32 1
42 gpu relu fp32 1
43 gpu conv fp32 1
44 gpu batchnorm fp32 1
45 gpu relu fp32 1
46 gpu group_conv fp32 1
47 gpu batchnorm fp32 1
48 gpu relu fp32 1
49 gpu conv fp32 1
50 gpu batchnorm fp32 1
51 gpu relu fp32 1
52 gpu group_conv fp32 1
53 gpu batchnorm fp32 1
54 gpu relu fp32 1
55 gpu conv fp32 1
56 gpu batchnorm fp32 1
57 gpu relu fp32 1
58 gpu group_conv fp32 1
59 gpu batchnorm fp32 1
60 gpu relu fp32 1
61 gpu conv fp32 1
62 gpu batchnorm fp32 1
63 gpu relu fp32 1
64 gpu group_conv fp32 1
65 gpu batchnorm fp32 1
66 gpu relu fp32 1
67 gpu conv fp32 1
68 gpu batchnorm fp32 1
69 gpu relu fp32 1
70 gpu group_conv fp32 1
71 gpu batchnorm fp32 1
72 gpu relu fp32 1
73 gpu conv fp32 1
74 gpu batchnorm fp32 1
75 gpu relu fp32 1
76 gpu group_conv fp32 1
77 gpu batchnorm fp32 1
78 gpu relu fp32 1
79 gpu conv fp32 1
80 gpu batchnorm fp32 1
81 gpu relu fp32 1
82 gpu pool_mean fp32 1
83 gpu mul fp32 1 add fp32 1
84 gpu softmax fp32 1
-----
+++++
conf2 1.5 0 84.8 0
1 gpu conv fp16 1
2 gpu batchnorm fp16 1
3 gpu relu fp16 1
4 gpu group_conv fp16 1
5 gpu batchnorm fp16 1
6 gpu relu fp16 1
7 gpu conv fp16 1
8 gpu batchnorm fp16 1
9 gpu relu fp16 1
10 gpu group_conv fp16 1
11 gpu batchnorm fp16 1
12 gpu relu fp16 1
13 gpu conv fp16 1
14 gpu batchnorm fp16 1
15 gpu relu fp16 1
16 gpu group_conv fp16 1
17 gpu batchnorm fp16 1
18 gpu relu fp16 1
19 gpu conv fp16 1
20 gpu batchnorm fp16 1
21 gpu relu fp16 1
22 gpu group_conv fp16 1
23 gpu batchnorm fp16 1
24 gpu relu fp16 1
25 gpu conv fp16 1
26 gpu batchnorm fp16 1
27 gpu relu fp16 1
28 gpu group_conv fp16 1
29 gpu batchnorm fp16 1
30 gpu relu fp16 1
31 gpu conv fp16 1
32 gpu batchnorm fp16 1
33 gpu relu fp16 1
34 gpu group_conv fp16 1
35 gpu batchnorm fp16 1
36 gpu relu fp16 1
37 gpu conv fp16 1
38 gpu batchnorm fp16 1
39 gpu relu fp16 1
40 gpu group_conv fp16 1
41 gpu batchnorm fp16 1
42 gpu relu fp16 1
43 gpu conv fp16 1
44 gpu batchnorm fp16 1
45 gpu relu fp16 1
46 gpu group_conv fp16 1
47 gpu batchnorm fp16 1
48 gpu relu fp16 1
49 gpu conv fp16 1
50 gpu batchnorm fp16 1
51 gpu relu fp16 1
52 gpu group_conv fp16 1
53 gpu batchnorm fp16 1
54 gpu relu fp16 1
55 gpu conv fp16 1
56 gpu batchnorm fp16 1
57 gpu relu fp16 1
58 gpu group_conv fp16 1
59 gpu batchnorm fp16 1
60 gpu relu fp16 1
61 gpu conv fp16 1
62 gpu batchnorm fp16 1
63 gpu relu fp16 1
64 gpu group_conv fp16 1
65 gpu batchnorm fp16 1
66 gpu relu fp16 1
67 gpu conv fp16 1
68 gpu batchnorm fp16 1
69 gpu relu fp16 1
70 gpu group_conv fp16 1
71 gpu batchnorm fp16 1
72 gpu relu fp16 1
73 gpu conv fp16 1
74 gpu batchnorm fp16 1
75 gpu relu fp16 1
76 gpu group_conv fp16 1
77 gpu batchnorm fp16 1
78 gpu relu fp16 1
79 gpu conv fp16 1
80 gpu batchnorm fp16 1
81 gpu relu fp16 1
82 gpu pool_mean fp16 1
83 gpu mul fp16 1 add fp16 1
84 gpu softmax fp32 1
-----
1 -0.5500815 0.60786617 -1.0248864 1.2929907 -0.36291853 0.2533059 0.0 0.753551840782
2 0.0 0.753551840782 -0.69884616 0.71849966 -0.2781147 0.45571187 0.0 1.01057458043
3 0.0 1.01057458043 -0.59568167 0.7714691 -0.8602873 0.19743633 -1.84771883726 1.87930787086
4 0.0 2.33981014252 -0.41976976 0.43748936 -0.7021962 0.3033103 0.0 1.04317724705
5 0.0 1.04317724705 -0.46757826 0.4635873 -0.20662616 0.1778044 -0.829483509064 0.786805033684
6 0.0 2.49733686686 -0.64404047 0.45383143 -0.819547 0.38550296 0.0 0.897360802293
7 0.0 0.897360802293 -0.41986948 0.33654243 -0.3563013 0.22371122 -0.957150224447 0.54919362247
8 0.0 2.37362146616 -0.4805263 0.50655717 -0.296758 0.7742441 0.0 3.01592136621
9 0.0 3.01592136621 -0.52083415 0.45517674 -0.20242067 0.8236838 -5.2759475708 5.79733039856
10 0.0 2.37362146616 -0.5338656 1.3395424 -0.20242067 0.8236838 -0.738995380998 2.33600783587
11 0.0 7.07933432579 -0.34429058 0.43629733 -1.0744808 0.056708273 0.0 1.58645607233
12 0.0 1.58645607233 -0.30342352 0.39493486 -0.44630566 0.6492069 -1.49672914267 1.29970229745
13 0.0 7.11914063454 -0.38351893 0.45775774 -1.4733055 -0.014426912 0.0 1.52876508832
14 0.0 1.52876508832 -0.25695276 0.45372736 -0.5259744 0.26591402 -1.59576894164 1.08074297309
15 0.0 6.94405080318 -0.55299705 0.5443531 -0.71790683 1.2730768 0.0 10.3651468277
16 0.0 10.3651468277 -0.4203967 0.48641303 -0.90653443 1.3546854 -22.372925148 17.2033731079
17 0.0 6.94405080318 -0.4365755 0.84913826 -0.90653443 1.3546851 -3.66810325861 4.87814051151
18 0.0 18.8401451111 -0.38657624 0.5228989 -1.2083547 0.76361173 0.0 19.1229192352
19 0.0 19.1229192352 -0.40857902 0.575035 -1.8731614 1.0960501 -31.3229312897 14.8234729958
20 0.0 23.7382488823 -0.33079496 0.5893278 -1.0234511 1.0016295 0.0 19.5892774963
21 0.0 19.5892774963 -0.27897888 0.38280907 -2.2086356 1.0066502 -34.4416886902 20.9890329933
22 0.0 10.8541981602 -1.5092047 1.0279838 -0.49379802 0.61032647 -40.9121678543 25.7082381058
2000
+++++
conf1 1 0 89.59 0
1 gpu conv fp32 1 add fp32 1 relu fp32 1
2 gpu conv fp32 1 add fp32 1 relu fp32 1
3 gpu conv fp32 1 add fp32 1
4 gpu add fp32 1
5 gpu relu fp32 1
6 gpu conv fp32 1 add fp32 1 relu fp32 1
7 gpu conv fp32 1 add fp32 1
8 gpu add fp32 1
9 gpu relu fp32 1
10 gpu conv fp32 1 add fp32 1 relu fp32 1
11 gpu conv fp32 1 add fp32 1
12 gpu add fp32 1
13 gpu relu fp32 1
14 gpu conv fp32 1 add fp32 1 relu fp32 1
15 gpu conv fp32 1 add fp32 1
16 gpu conv fp32 1 add fp32 1
17 gpu add fp32 1
18 gpu relu fp32 1
19 gpu conv fp32 1 add fp32 1 relu fp32 1
20 gpu conv fp32 1 add fp32 1
21 gpu add fp32 1
22 gpu relu fp32 1
23 gpu conv fp32 1 add fp32 1 relu fp32 1
24 gpu conv fp32 1 add fp32 1
25 gpu add fp32 1
26 gpu relu fp32 1
27 gpu conv fp32 1 add fp32 1 relu fp32 1
28 gpu conv fp32 1 add fp32 1
29 gpu conv fp32 1 add fp32 1
30 gpu add fp32 1
31 gpu relu fp32 1
32 gpu conv fp32 1 add fp32 1 relu fp32 1
33 gpu conv fp32 1 add fp32 1
34 gpu add fp32 1
35 gpu relu fp32 1
36 gpu conv fp32 1 add fp32 1 relu fp32 1
37 gpu conv fp32 1 add fp32 1
38 gpu add fp32 1
39 gpu relu fp32 1
40 gpu pool_mean fp32 1
41 gpu mul fp32 1 add fp32 1
42 gpu softmax fp32 1
-----
+++++
conf2 1.5 0 89.59 0
1 gpu conv fp16 1 add fp16 1 relu fp16 1
2 gpu conv fp16 1 add fp16 1 relu fp16 1
3 gpu conv fp16 1 add fp16 1
4 gpu add fp16 1
5 gpu relu fp16 1
6 gpu conv fp16 1 add fp16 1 relu fp16 1
7 gpu conv fp16 1 add fp16 1
8 gpu add fp16 1
9 gpu relu fp16 1
10 gpu conv fp16 1 add fp16 1 relu fp16 1
11 gpu conv fp16 1 add fp16 1
12 gpu add fp16 1
13 gpu relu fp16 1
14 gpu conv fp16 1 add fp16 1 relu fp16 1
15 gpu conv fp16 1 add fp16 1
16 gpu conv fp16 1 add fp16 1
17 gpu add fp16 1
18 gpu relu fp16 1
19 gpu conv fp16 1 add fp16 1 relu fp16 1
20 gpu conv fp16 1 add fp16 1
21 gpu add fp16 1
22 gpu relu fp16 1
23 gpu conv fp16 1 add fp16 1 relu fp16 1
24 gpu conv fp16 1 add fp16 1
25 gpu add fp16 1
26 gpu relu fp16 1
27 gpu conv fp16 1 add fp16 1 relu fp16 1
28 gpu conv fp16 1 add fp16 1
29 gpu conv fp16 1 add fp16 1
30 gpu add fp16 1
31 gpu relu fp16 1
32 gpu conv fp16 1 add fp16 1 relu fp16 1
33 gpu conv fp16 1 add fp16 1
34 gpu add fp16 1
35 gpu relu fp16 1
36 gpu conv fp16 1 add fp16 1 relu fp16 1
37 gpu conv fp16 1 add fp16 1
38 gpu add fp16 1
39 gpu relu fp16 1
40 gpu pool_mean fp16 1
41 gpu mul fp16 1 add fp16 1
42 gpu softmax fp32 1
-----
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0
20 0 0 0 0 0 0 0 0
21 0 0 0 0 0 0 0 0
22 0 0 0 0 0 0 0 0
23 0 0 0 0 0 0 0 0
24 0 0 0 0 0 0 0 0
25 0 0 0 0 0 0 0 0
26 0 0 0 0 0 0 0 0
27 0 0 0 0 0 0 0 0
28 0 0 0 0 0 0 0 0
29 0 0 0 0 0 0 0 0
30 0 0 0 0 0 0 0 0
31 0 0 0 0 0 0 0 0
32 0 0 0 0 0 0 0 0
33 0 0 0 0 0 0 0 0
34 0 0 0 0 0 0 0 0
35 0 0 0 0 0 0 0 0
36 0 0 0 0 0 0 0 0
37 0 0 0 0 0 0 0 0
38 0 0 0 0 0 0 0 0
39 0 0 0 0 0 0 0 0
40 0 0 0 0 0 0 0 0
41 0 0 0 0 0 0 0 0
42 0 0 0 0 0 0 0 0
43 0 0 0 0 0 0 0 0
44 0 0 0 0 0 0 0 0
45 0 0 0 0 0 0 0 0
46 0 0 0 0 0 0 0 0
47 0 0 0 0 0 0 0 0
48 0 0 0 0 0 0 0 0
49 0 0 0 0 0 0 0 0
50 0 0 0 0 0 0 0 0
51 0 0 0 0 0 0 0 0
52 0 0 0 0 0 0 0 0
53 0 0 0 0 0 0 0 0
54 0 0 0 0 0 0 0 0
55 0 0 0 0 0 0 0 0
56 0 0 0 0 0 0 0 0
57 0 0 0 0 0 0 0 0
7161.053769000008
+++++
conf1 1 1 75.7 0.0
1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
2 gpu batchnorm fp32 11
3 gpu conv fp32 11 add fp32 1
4 gpu batchnorm fp32 11
5 gpu relu fp32 11
6 gpu conv fp32 11 add fp32 1
7 gpu batchnorm fp32 11
8 gpu relu fp32 11
9 gpu conv fp32 11 add fp32 1
10 gpu batchnorm fp32 11
11 gpu conv fp32 11 add fp32 1
12 gpu batchnorm fp32 11
13 gpu add fp32 11
14 gpu relu fp32 11
15 gpu conv fp32 11 add fp32 1
16 gpu batchnorm fp32 11
17 gpu relu fp32 11
18 gpu conv fp32 11 add fp32 1
19 gpu batchnorm fp32 11
20 gpu relu fp32 11
21 gpu conv fp32 11 add fp32 1
22 gpu batchnorm fp32 11
23 gpu add fp32 11
24 gpu relu fp32 11
25 gpu conv fp32 11 add fp32 1
26 gpu batchnorm fp32 11
27 gpu relu fp32 11
28 gpu conv fp32 11 add fp32 1
29 gpu batchnorm fp32 11
30 gpu relu fp32 11
31 gpu conv fp32 11 add fp32 1
32 gpu batchnorm fp32 11
33 gpu add fp32 11
34 gpu relu fp32 11
35 gpu conv fp32 11 add fp32 1
36 gpu batchnorm fp32 11
37 gpu relu fp32 11
38 gpu conv fp32 11 add fp32 1
39 gpu batchnorm fp32 11
40 gpu relu fp32 11
41 gpu conv fp32 11 add fp32 1
42 gpu batchnorm fp32 11
43 gpu conv fp32 11 add fp32 1
44 gpu batchnorm fp32 11
45 gpu add fp32 11
46 gpu relu fp32 11
47 gpu conv fp32 11 add fp32 1
48 gpu batchnorm fp32 11
49 gpu relu fp32 11
50 gpu conv fp32 11 add fp32 1
51 gpu batchnorm fp32 11
52 gpu relu fp32 11
53 gpu conv fp32 11 add fp32 1
54 gpu batchnorm fp32 11
55 gpu add fp32 11
56 gpu relu fp32 11
57 gpu conv fp32 11 add fp32 1
58 gpu batchnorm fp32 11
59 gpu relu fp32 11
60 gpu conv fp32 11 add fp32 1
61 gpu batchnorm fp32 11
62 gpu relu fp32 11
63 gpu conv fp32 11 add fp32 1
64 gpu batchnorm fp32 11
65 gpu add fp32 11
66 gpu relu fp32 11
67 gpu conv fp32 11 add fp32 1
68 gpu batchnorm fp32 11
69 gpu relu fp32 11
70 gpu conv fp32 11 add fp32 1
71 gpu batchnorm fp32 11
72 gpu relu fp32 11
73 gpu conv fp32 11 add fp32 1
74 gpu batchnorm fp32 11
75 gpu add fp32 11
76 gpu relu fp32 11
77 gpu conv fp32 11 add fp32 1
78 gpu batchnorm fp32 11
79 gpu relu fp32 11
80 gpu conv fp32 11 add fp32 1
81 gpu batchnorm fp32 11
82 gpu relu fp32 11
83 gpu conv fp32 11 add fp32 1
84 gpu batchnorm fp32 11
85 gpu conv fp32 11 add fp32 1
86 gpu batchnorm fp32 11
87 gpu add fp32 11
88 gpu relu fp32 11
89 gpu conv fp32 11 add fp32 1
90 gpu batchnorm fp32 11
91 gpu relu fp32 11
92 gpu conv fp32 11 add fp32 1
93 gpu batchnorm fp32 11
94 gpu relu fp32 11
95 gpu conv fp32 11 add fp32 1
96 gpu batchnorm fp32 11
97 gpu add fp32 11
98 gpu relu fp32 11
99 gpu conv fp32 11 add fp32 1
100 gpu batchnorm fp32 11
101 gpu relu fp32 11
102 gpu conv fp32 11 add fp32 1
103 gpu batchnorm fp32 11
104 gpu relu fp32 11
105 gpu conv fp32 11 add fp32 1
106 gpu batchnorm fp32 11
107 gpu add fp32 11
108 gpu relu fp32 11
109 gpu conv fp32 11 add fp32 1
110 gpu batchnorm fp32 11
111 gpu relu fp32 11
112 gpu conv fp32 11 add fp32 1
113 gpu batchnorm fp32 11
114 gpu relu fp32 11
115 gpu conv fp32 11 add fp32 1
116 gpu batchnorm fp32 11
117 gpu add fp32 11
118 gpu relu fp32 11
119 gpu conv fp32 11 add fp32 1
120 gpu batchnorm fp32 11
121 gpu relu fp32 11
122 gpu conv fp32 11 add fp32 1
123 gpu batchnorm fp32 11
124 gpu relu fp32 11
125 gpu conv fp32 11 add fp32 1
126 gpu batchnorm fp32 11
127 gpu add fp32 11
128 gpu relu fp32 11
129 gpu conv fp32 11 add fp32 1
130 gpu batchnorm fp32 11
131 gpu relu fp32 11
132 gpu conv fp32 11 add fp32 1
133 gpu batchnorm fp32 11
134 gpu relu fp32 11
135 gpu conv fp32 11 add fp32 1
136 gpu batchnorm fp32 11
137 gpu add fp32 11
138 gpu relu fp32 11
139 gpu conv fp32 11 add fp32 1
140 gpu batchnorm fp32 11
141 gpu relu fp32 11
142 gpu conv fp32 11 add fp32 1
143 gpu batchnorm fp32 11
144 gpu relu fp32 11
145 gpu conv fp32 11 add fp32 1
146 gpu batchnorm fp32 11
147 gpu conv fp32 11 add fp32 1
148 gpu batchnorm fp32 11
149 gpu add fp32 11
150 gpu relu fp32 11
151 gpu conv fp32 11 add fp32 1
152 gpu batchnorm fp32 11
153 gpu relu fp32 11
154 gpu conv fp32 11 add fp32 1
155 gpu batchnorm fp32 11
156 gpu relu fp32 11
157 gpu conv fp32 11 add fp32 1
158 gpu batchnorm fp32 11
159 gpu add fp32 11
160 gpu relu fp32 11
161 gpu conv fp32 11 add fp32 1
162 gpu batchnorm fp32 11
163 gpu relu fp32 11
164 gpu conv fp32 11 add fp32 1
165 gpu batchnorm fp32 11
166 gpu relu fp32 11
167 gpu conv fp32 11 add fp32 1
168 gpu batchnorm fp32 11
169 gpu add fp32 11
170 gpu relu fp32 11
171 gpu pool_max fp32 11
172 gpu mul fp32 11 add fp32 1
173 gpu softmax fp32 1
-----
1 -1.8816367 2.0934217 -0.53275156 0.49437004 -0.6403629 0.2490165 0.0 1.35908746719
2 0.0 1.35908746719 -0.2688396 0.20639156 -0.7745511 0.82006615 0.0 2.52123117924
3 0.0 2.52123117924 -0.16776876 0.14878987 -0.35283303 0.5154362 0.0 1.20119857848
4 0.0 1.20119857848 -0.088948585 0.114222586 -0.30250227 0.36856708 0.0 1.03598809302
5 0.0 1.03598809302 -0.07739562 0.10973293 -0.15568458 0.17634983 0.0 0.300495595038
6 0.0 0.300495595038 -0.051649556 0.05435231 -0.07395447 0.07996062 0.0 0.11490475405
7 0.0 0.11490475405 -0.043513633 0.07577866 -0.06921874 0.02660573 0.0 0.16232508488
8 0.0 0.16232508488 -0.033842053 0.045218028 -0.022827804 0.023845317 0.0 0.124249965735
9 0.0 0.124249965735 -0.02211613 0.032084666 -0.02699063 0.03773564 0.0 0.174634486511
10 0.0 0.174634486511 -0.01979376 0.034854397 -0.036107242 0.07056531 0.0 0.575175762177
11 0.0 0.575175762177 -0.03452098 0.046055835 -0.051925894 0.07039055 0.0 0.771875114441
12 0.0 0.771875114441 -0.025946895 0.040090334 -0.06049362 0.12658806 0.0 1.17285169065
13 0.0 1.17285169065 -0.021766115 0.03315237 -0.20705001 0.117947325 0.0 2.00157693863
14 0.0 2.00157693863 -0.042597745 0.046707444 -0.21937433 0.2545502 0.0 2.00236111879
15 0.0 2.00236111879 -0.32550547 0.30829763 -1.1787822 1.2378151 -18.2514705467 24.1736344528
+++++
conf1 1 0 90.19 0
1 gpu conv fp32 1 add fp32 1 relu fp32 1
2 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 1 add fp32 1 relu fp32 1
4 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
5 gpu conv fp32 1 add fp32 1 relu fp32 1
6 gpu conv fp32 1 add fp32 1 relu fp32 1
7 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
8 gpu conv fp32 1 add fp32 1 relu fp32 1
9 gpu conv fp32 1 add fp32 1 relu fp32 1
10 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
11 gpu conv fp32 1 add fp32 1 relu fp32 1
12 gpu conv fp32 1 add fp32 1 relu fp32 1
13 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
14 gpu mul fp32 1 add fp32 1 relu fp32 1
15 gpu mul fp32 1 add fp32 1
16 gpu softmax fp32 1
-----
+++++
conf2 1.5 0 90.19 0
1 gpu conv fp16 1 add fp16 1 relu fp16 1
2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 1 add fp16 1 relu fp16 1
4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 1 add fp16 1 relu fp16 1
6 gpu conv fp16 1 add fp16 1 relu fp16 1
7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv fp16 1 add fp16 1 relu fp16 1
9 gpu conv fp16 1 add fp16 1 relu fp16 1
10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv fp16 1 add fp16 1 relu fp16 1
12 gpu conv fp16 1 add fp16 1 relu fp16 1
13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 1 add fp16 1 relu fp16 1
15 gpu mul fp16 1 add fp16 1
16 gpu softmax fp32 1
-----
1 -1.7829767 1.9456929 -0.7450515 0.71249133 -1.5885142 0.275554 0.0 8.190712
2 0.0 8.190712 -0.30790088 0.43504623 -1.4242363 1.2602744 0.0 19.023172
3 0.0 19.023172 -0.29189092 0.26958522 -1.0527138 0.9075671 0.0 14.428051
4 0.0 14.428051 -0.15521508 0.1829038 -0.845419 1.9358484 0.0 23.065294
5 0.0 23.065294 -0.13149762 0.14811686 -0.7162557 1.0370971 0.0 15.165984
6 0.0 15.165984 -0.06236292 0.08321518 -0.9067523 0.9922458 0.0 13.664733
7 0.0 13.664733 -0.06471479 0.1024472 -0.15943134 0.7988499 0.0 19.025272
8 0.0 19.025272 -0.06320205 0.08291938 -0.32540628 0.5203079 0.0 6.727217
9 0.0 6.727217 -0.037707984 0.051601283 -0.25622904 0.11251946 0.0 3.2003012
10 0.0 3.2003012 -0.056007143 0.09549151 -0.11591503 0.06267536 0.0 4.321189
11 0.0 4.321189 -0.060094673 0.10868926 -0.105962686 0.09584572 0.0 2.936297
12 0.0 2.936297 -0.034618977 0.05792674 -0.4237576 0.11035452 0.0 4.87262
13 0.0 4.87262 -0.035480656 0.058295887 -0.21477045 0.14263579 0.0 10.32133
14 0.0 10.32133 -0.08929961 0.11301676 -0.20798548 0.47405547 0.0 13.91
15 0.0 13.91 -0.6627122 0.35539475 -1.0631907 0.9830786 -70.45701 87.34367
2000
+++++
conf1 1 0 90.19 0
1 gpu conv fp32 1 add fp32 1 relu fp32 1
2 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 1 add fp32 1 relu fp32 1
4 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
5 gpu conv fp32 1 add fp32 1 relu fp32 1
6 gpu conv fp32 1 add fp32 1 relu fp32 1
7 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
8 gpu conv fp32 1 add fp32 1 relu fp32 1
9 gpu conv fp32 1 add fp32 1 relu fp32 1
10 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
11 gpu conv fp32 1 add fp32 1 relu fp32 1
12 gpu conv fp32 1 add fp32 1 relu fp32 1
13 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1
14 gpu mul fp32 1 add fp32 1 relu fp32 1
15 gpu mul fp32 1 add fp32 1
16 gpu softmax fp32 1
-----
+++++
conf2 1.5 0 90.19 0
1 gpu conv fp16 1 add fp16 1 relu fp16 1
2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
3 gpu conv fp16 1 add fp16 1 relu fp16 1
4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
5 gpu conv fp16 1 add fp16 1 relu fp16 1
6 gpu conv fp16 1 add fp16 1 relu fp16 1
7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
8 gpu conv fp16 1 add fp16 1 relu fp16 1
9 gpu conv fp16 1 add fp16 1 relu fp16 1
10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
11 gpu conv fp16 1 add fp16 1 relu fp16 1
12 gpu conv fp16 1 add fp16 1 relu fp16 1
13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1
14 gpu mul fp16 1 add fp16 1 relu fp16 1
15 gpu mul fp16 1 add fp16 1
16 gpu softmax fp32 1
-----
-123.68 151.061 -0.5682651399970055 0.5677501424551024 -0.015828926 2.064037 0.0 455.8094849853551
0.0 455.8094849853551 -0.13156980648636818 0.2164201746285022 -1.0271513 0.9052184 0.0 2768.0925808105603
0.0 2768.0925808105603 -0.18644111251831055 0.202149114727974 -0.17922063 0.36547425 0.0 2843.0992189941426
0.0 2843.0992189941426 -0.10804861642420292 0.12427636455744764 -0.59533477 0.63375777 0.0 6242.117490722692
0.0 6242.117490722692 -0.08040237371623515 0.09835810117424044 -0.20097896 0.34949613 0.0 5991.169433105955
0.0 5991.169433105955 -0.05306418750435114 0.06628044287860436 -0.18124875 0.274845 0.0 5665.713403320435
0.0 5665.713403320435 -0.05084674355760217 0.07320860563218634 -0.14288792 0.59477174 0.0 7507.031136718913
0.0 7507.031136718913 -0.04523278899490833 0.053042236261070186 -0.14548235 0.3148451 0.0 4485.656244140948
0.0 4485.656244140948 -0.02917514201253653 0.03586270406842279 -0.08428453 0.18237582 0.0 2199.244487060563
0.0 2199.244487060563 -0.029496615380048753 0.04047201693058028 -0.19835947 0.33766547 0.0 1430.6242337648137
0.0 1430.6242337648137 -0.031951379626989365 0.04218719156458998 -0.3508028 0.6397485 0.0 740.3813181152382
0.0 740.3813181152382 -0.028522676015272738 0.03794213477522136 -0.9171057 0.7597668 0.0 379.07848284912643
0.0 379.07848284912643 -0.02821479567326605 0.03854479804635069 -0.50036746 9.431553 0.0 200.11104328918475
0.0 200.11104328918475 -0.007091613108757884 0.008147951829247227 -0.78005254 0.8555075 0.0 58.32595815658672
0.0 58.32595815658672 -0.012781758182682096 0.01437051862943929 -0.012339931 1.2154555 0.0 17.291313619614062
0.0 17.291313619614062 -0.02119149128906429 0.02715564412623694 -0.773357 0.6615543 -11.322624863624572 24.837094623566433
19194.623482
+++++
conf1 1 1 72.84 0.0
1 gpu conv fp32 11 add fp32 1 relu fp32 1
2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
3 gpu conv fp32 11 add fp32 1 relu fp32 1
4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
5 gpu conv fp32 11 add fp32 1 relu fp32 1
6 gpu conv fp32 11 add fp32 1 relu fp32 1
7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
8 gpu conv fp32 11 add fp32 1 relu fp32 1
9 gpu conv fp32 11 add fp32 1 relu fp32 1
10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
11 gpu conv fp32 11 add fp32 1 relu fp32 1
12 gpu conv fp32 11 add fp32 1 relu fp32 1
13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
14 gpu mul fp32 11 add fp32 1 relu fp32 1
15 gpu mul fp32 11 add fp32 1 relu fp32 1
16 gpu mul fp32 11 add fp32 1
17 gpu softmax fp32 1
-----
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment