diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet/alexnet_valid_soc.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet/alexnet_valid_soc.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b7aeb981c745717c52c841f99672cfbd532f7cb
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet/alexnet_valid_soc.txt
@@ -0,0 +1,231 @@
+2725.121326
++++++
+conf1 1 1 78.78 0.0
+1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
+2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
+3 gpu conv fp32 11 add fp32 1 tanh fp32 1
+4 gpu conv fp32 11 add fp32 1 tanh fp32 1
+5 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
+6 gpu mul fp32 11 add fp32 1
+7 gpu softmax fp32 1
+-----
++++++
+conf2 2.1233638648528457 1.6150951710244676 78.3544 0.42560000000000286
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf3 2.051295134864554 1.6122580072322763 78.3278 0.4522000000000048
+1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf4 2.188609573694276 1.688911612634961 78.30120000000001 0.47879999999999256
+1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf5 2.0570505767108007 1.6000014977491621 78.2214 0.5585999999999984
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 265 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf6 2.009166522889861 1.5755494376470724 78.1948 0.5852000000000004
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf7 2.0188668300066377 1.5976556515195433 78.06179999999999 0.7182000000000102
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf8 2.1797184471932716 1.6767378001241562 78.06179999999999 0.7182000000000102
+1 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf9 2.064914192886025 1.6203964986881603 78.06179999999999 0.7182000000000102
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf10 2.2070171560926672 1.7194657877315815 78.0352 0.7447999999999979
+1 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 265 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf11 2.0161469236407057 1.5964768988685245 78.0086 0.7713999999999999
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf12 2.157846755426679 1.6765250202752133 78.0086 0.7713999999999999
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf13 2.0319664118931096 1.6183541826275754 77.98200000000001 0.7979999999999876
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf14 2.354997704376988 1.7779732164691666 77.98200000000001 0.7979999999999876
+1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv fp16 12 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf15 2.3463673263694 1.8510470086526165 77.98200000000001 0.7979999999999876
+1 gpu conv samp_fp16 264 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf16 2.284714727579521 1.7855758235498087 77.7692 1.0108000000000033
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+5 gpu conv samp_fp16 269 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf17 2.3463673263694 1.8510470086526165 77.68939999999999 1.0906000000000091
+1 gpu conv samp_fp16 264 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf18 2.427840309027486 1.9007943438562696 77.68939999999999 1.0906000000000091
+1 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 263 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf19 2.4671009475732766 1.9246545843862224 77.47659999999999 1.3034000000000106
+1 gpu conv samp_fp16 264 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf20 2.5567127702266332 1.9773019485322874 77.2638 1.5161999999999978
+1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf21 2.557898283218207 1.9895818051250724 77.2372 1.5427999999999997
+1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf22 2.557898283218207 1.9895818051250724 77.21060000000001 1.5693999999999875
+1 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
++++++
+conf23 2.6457265307759883 2.029290916760937 77.1574 1.6226000000000056
+1 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+6 gpu mul fp16 12 add fp16 12
+7 gpu softmax fp16 12
+-----
diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet2/alexnet2_valid_soc.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet2/alexnet2_valid_soc.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a888b5ee5a50d140f60d6579a3f6bdb6aa5ddfbd
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/alexnet2/alexnet2_valid_soc.txt
@@ -0,0 +1,188 @@
+1129.3450630000002
++++++
+conf1 1 1 84.76 0.0
+1 gpu conv fp32 11 add fp32 1 tanh fp32 1
+2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
+3 gpu conv fp32 11 add fp32 1 tanh fp32 1
+4 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
+5 gpu conv fp32 11 add fp32 1 tanh fp32 1
+6 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1
+7 gpu mul fp32 11 add fp32 1
+8 gpu softmax fp32 1
+-----
++++++
+conf2 2.2258170210610477 1.3875307929727092 84.74 0.020000000000010232
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 151 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf3 2.3673182996864846 1.4566777038051897 84.49999999999999 0.2600000000000193
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf4 2.24614762418964 1.41800542976017 84.25999999999999 0.5000000000000142
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf5 2.304084258604824 1.4284953488024343 84.228 0.5320000000000107
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 151 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 267 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf6 2.3377766277342653 1.4440340860007412 84.228 0.5320000000000107
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+6 gpu conv fp16 12 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf7 2.24614762418964 1.41800542976017 84.17479999999999 0.5852000000000146
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf8 2.3673182996864846 1.4566777038051897 84.095 0.6650000000000063
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf9 2.2463714607055545 1.417884448648111 83.8024 0.9575999999999993
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf10 2.389025803395913 1.4732901147183992 83.77579999999999 0.9842000000000155
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf11 2.288831273542033 1.435952475412438 83.61619999999999 1.143800000000013
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf12 2.288831273542033 1.435952475412438 83.58959999999999 1.170400000000015
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 158 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf13 2.389025803395913 1.4732901147183992 83.58959999999999 1.170400000000015
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 268 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf14 2.3892790238475423 1.4731595166090572 83.4566 1.3034000000000106
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf15 2.390450803781405 1.4707319718833016 83.3768 1.3832000000000022
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 266 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 157 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf16 2.4373708430335537 1.49267343110314 83.3768 1.3832000000000022
+1 gpu conv fp16 11 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
++++++
+conf17 2.4373708430335537 1.49267343110314 83.2704 1.48960000000001
+1 gpu conv fp16 12 add fp16 12 tanh fp16 12
+2 gpu conv perf_fp16 153 add fp16 12 tanh fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 tanh fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 261 add fp16 12 tanh fp16 12
+6 gpu conv perf_fp16 160 add fp16 12 tanh fp16 12 pool_max fp16 12
+7 gpu mul fp16 12 add fp16 12
+8 gpu softmax fp16 12
+-----
diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/resnet18/resnet18_valid_soc.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/resnet18/resnet18_valid_soc.txt
new file mode 100644
index 0000000000000000000000000000000000000000..942789c1c4defd1139e75209ffbcb073a2b39b30
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/resnet18/resnet18_valid_soc.txt
@@ -0,0 +1,1576 @@
+2593.3013975999997
++++++
+conf1 1 1 89.42 0.0
+1 gpu conv fp32 11 add fp32 1 relu fp32 1
+2 gpu conv fp32 11 add fp32 1 relu fp32 1
+3 gpu conv fp32 11 add fp32 1
+4 gpu add fp32 11
+5 gpu relu fp32 11
+6 gpu conv fp32 11 add fp32 1 relu fp32 1
+7 gpu conv fp32 11 add fp32 1
+8 gpu add fp32 11
+9 gpu relu fp32 11
+10 gpu conv fp32 11 add fp32 1 relu fp32 1
+11 gpu conv fp32 11 add fp32 1
+12 gpu add fp32 11
+13 gpu relu fp32 11
+14 gpu conv fp32 11 add fp32 1 relu fp32 1
+15 gpu conv fp32 11 add fp32 1
+16 gpu conv fp32 11 add fp32 1
+17 gpu add fp32 11
+18 gpu relu fp32 11
+19 gpu conv fp32 11 add fp32 1 relu fp32 1
+20 gpu conv fp32 11 add fp32 1
+21 gpu add fp32 11
+22 gpu relu fp32 11
+23 gpu conv fp32 11 add fp32 1 relu fp32 1
+24 gpu conv fp32 11 add fp32 1
+25 gpu add fp32 11
+26 gpu relu fp32 11
+27 gpu conv fp32 11 add fp32 1 relu fp32 1
+28 gpu conv fp32 11 add fp32 1
+29 gpu conv fp32 11 add fp32 1
+30 gpu add fp32 11
+31 gpu relu fp32 11
+32 gpu conv fp32 11 add fp32 1 relu fp32 1
+33 gpu conv fp32 11 add fp32 1
+34 gpu add fp32 11
+35 gpu relu fp32 11
+36 gpu conv fp32 11 add fp32 1 relu fp32 1
+37 gpu conv fp32 11 add fp32 1
+38 gpu add fp32 11
+39 gpu relu fp32 11
+40 gpu pool_mean fp32 11
+41 gpu mul fp32 11 add fp32 1
+42 gpu softmax fp32 1
+-----
++++++
+conf2 1.8227860146926984 1.3592380545823108 88.28 1.1400000000000006
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 162 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 166 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv fp16 12 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf3 1.772745264351603 1.3340968704252147 88.2 1.2199999999999989
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 166 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf4 1.831301934833889 1.3636544094268177 88.2 1.2199999999999989
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf5 1.7541385118416233 1.323200331238725 88.12 1.2999999999999972
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 166 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf6 1.750881760437994 1.3214899710791683 88.12 1.2999999999999972
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 166 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf7 1.9207420870636576 1.4105446231099241 88.1 1.3200000000000074
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 159 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 160 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv fp16 11 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv fp16 12 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf8 1.897654446584276 1.3943617562849198 88.1 1.3200000000000074
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 263 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv fp16 12 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv perf_fp16 154 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv samp_fp16 262 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf9 1.9276001243246026 1.4155139358802007 88.08 1.3400000000000034
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 168 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 159 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 160 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv fp16 11 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 155 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf10 1.8877611861107602 1.3945090937373315 88.03999999999999 1.3800000000000097
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 154 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 166 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf11 1.884015904997108 1.386748889441216 87.96000000000001 1.4599999999999937
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 263 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv fp16 12 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv perf_fp16 154 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv samp_fp16 262 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf12 1.815742308450095 1.3541765419789824 87.83999999999999 1.5800000000000125
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 262 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv fp16 11 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf13 1.928011277898605 1.414528053850526 87.83999999999999 1.5800000000000125
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 159 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 160 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv fp16 11 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 155 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf14 1.8702574116471649 1.3838796270391824 87.8 1.6200000000000045
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 269 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf15 1.9390257777318618 1.4193909923193697 87.8 1.6200000000000045
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 159 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv perf_fp16 154 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 155 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf16 1.8505712546542585 1.372601565984325 87.76 1.6599999999999966
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 153 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf17 1.931335957581042 1.4149043748735137 87.74 1.6800000000000068
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 157 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf18 1.8390656100510818 1.3668229301466752 87.68 1.7399999999999949
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf19 1.9360126662655235 1.416245073512222 87.64 1.7800000000000011
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 155 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 264 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf20 1.826739398491775 1.3609522133620269 87.62 1.7999999999999972
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 153 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv samp_fp16 262 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 165 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf21 1.8243322012642802 1.3542277148411042 87.62 1.7999999999999972
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 263 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv samp_fp16 266 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf22 1.8245510435946863 1.3601414031759373 87.58 1.8400000000000034
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv samp_fp16 269 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 153 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf23 1.9832010015590205 1.4407797001367388 87.56 1.8599999999999994
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 159 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 261 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv fp16 11 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 155 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf24 1.831958859203629 1.3643626254848584 87.5 1.9200000000000017
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 151 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf25 1.827209961997738 1.3576190436536635 87.5 1.9200000000000017
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 263 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 159 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv fp16 12 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv perf_fp16 154 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv samp_fp16 262 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf26 1.9532893879837718 1.4253186875342474 87.5 1.9200000000000017
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 168 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 262 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv fp16 11 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf27 1.8598315807624513 1.376813374656673 87.48 1.9399999999999977
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf28 1.8545931630272876 1.3744725755811524 87.48 1.9399999999999977
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 267 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 152 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf29 1.9088935397779812 1.4033062374488858 87.44 1.980000000000004
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 163 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 267 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf30 1.8306014158563824 1.3613821654101905 87.44 1.980000000000004
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 164 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 265 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 168 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 262 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf31 1.9755297077095708 1.4378811225069261 87.44 1.980000000000004
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 159 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 159 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv perf_fp16 154 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 155 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf32 1.827200177575606 1.356175543415313 87.38 2.0400000000000063
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 156 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 264 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv perf_fp16 167 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf33 1.8517276001191023 1.3729319418960464 87.38 2.0400000000000063
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+7 gpu conv fp16 12 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv samp_fp16 269 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 161 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 157 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 158 add fp16 12 relu fp16 12
+24 gpu conv perf_fp16 160 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv samp_fp16 268 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv fp16 12 add fp16 12 relu fp16 12
+37 gpu conv samp_fp16 269 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf34 1.8938192956663813 1.3919348631813433 87.38 2.0400000000000063
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 263 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 165 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv fp16 11 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv samp_fp16 262 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
++++++
+conf35 1.8989539669005067 1.3938360809175603 87.36 2.0600000000000023
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv fp16 12 add fp16 12 relu fp16 12
+3 gpu conv fp16 12 add fp16 12
+4 gpu add fp16 12
+5 gpu relu fp16 12
+6 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 263 add fp16 12
+8 gpu add fp16 12
+9 gpu relu fp16 12
+10 gpu conv perf_fp16 154 add fp16 12 relu fp16 12
+11 gpu conv perf_fp16 154 add fp16 12
+12 gpu add fp16 12
+13 gpu relu fp16 12
+14 gpu conv fp16 12 add fp16 12 relu fp16 12
+15 gpu conv fp16 12 add fp16 12
+16 gpu conv fp16 11 add fp16 12
+17 gpu add fp16 12
+18 gpu relu fp16 12
+19 gpu conv perf_fp16 153 add fp16 12 relu fp16 12
+20 gpu conv perf_fp16 151 add fp16 12
+21 gpu add fp16 12
+22 gpu relu fp16 12
+23 gpu conv perf_fp16 157 add fp16 12 relu fp16 12
+24 gpu conv samp_fp16 268 add fp16 12
+25 gpu add fp16 12
+26 gpu relu fp16 12
+27 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+28 gpu conv fp16 12 add fp16 12
+29 gpu conv perf_fp16 154 add fp16 12
+30 gpu add fp16 12
+31 gpu relu fp16 12
+32 gpu conv fp16 12 add fp16 12 relu fp16 12
+33 gpu conv fp16 12 add fp16 12
+34 gpu add fp16 12
+35 gpu relu fp16 12
+36 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+37 gpu conv samp_fp16 262 add fp16 12
+38 gpu add fp16 12
+39 gpu relu fp16 12
+40 gpu pool_mean fp16 12
+41 gpu mul fp16 12 add fp16 12
+42 gpu softmax fp16 12
+-----
diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar10/vgg16_cifar10_valid_soc.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar10/vgg16_cifar10_valid_soc.txt
new file mode 100644
index 0000000000000000000000000000000000000000..789f4e21cf4a778535d1df0f9f7be22c1415d672
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar10/vgg16_cifar10_valid_soc.txt
@@ -0,0 +1,1027 @@
+3994.0731450000017
++++++
+conf1 1 1 89.22 0.0
+1 gpu conv fp32 11 add fp32 1 relu fp32 1
+2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+3 gpu conv fp32 11 add fp32 1 relu fp32 1
+4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+5 gpu conv fp32 11 add fp32 1 relu fp32 1
+6 gpu conv fp32 11 add fp32 1 relu fp32 1
+7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+8 gpu conv fp32 11 add fp32 1 relu fp32 1
+9 gpu conv fp32 11 add fp32 1 relu fp32 1
+10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+11 gpu conv fp32 11 add fp32 1 relu fp32 1
+12 gpu conv fp32 11 add fp32 1 relu fp32 1
+13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+14 gpu mul fp32 11 add fp32 1 relu fp32 1
+15 gpu mul fp32 11 add fp32 1
+16 gpu softmax fp32 1
+-----
++++++
+conf2 2.3049904288987464 1.6887800235455193 89.14 0.0799999999999983
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+13 gpu conv fp16 11 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf3 2.357615734902983 1.7226289827534114 89.14 0.0799999999999983
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf4 2.3831343547359976 1.7374446557158316 88.84 0.37999999999999545
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 162 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf5 2.3696393667573616 1.7284732038695636 88.8 0.4200000000000017
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 162 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 265 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf6 2.4444787116056292 1.7833916898567774 88.58 0.6400000000000006
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf7 2.40209759505425 1.7661661942711917 88.58 0.6400000000000006
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf8 2.528892013058046 1.8332619869789675 88.08 1.1400000000000006
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf9 2.5283008295291105 1.8324605771289624 88.06 1.1599999999999966
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf10 2.5562616043247313 1.847605117430125 88.03999999999999 1.1800000000000068
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf11 2.5337351216813757 1.836759334487813 88.03999999999999 1.1800000000000068
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf12 2.556171297969468 1.8482604143790797 88.03999999999999 1.1800000000000068
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf13 2.5562385363337343 1.8481145682015834 88.03999999999999 1.1800000000000068
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf14 2.556612910921585 1.8486422226408725 88.03999999999999 1.1800000000000068
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf15 2.5419253262471346 1.8395765136023223 88.02 1.2000000000000028
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 263 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf16 2.4937721600323406 1.8116328904640306 88.0 1.2199999999999989
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv perf_fp16 162 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf17 2.5545877208248187 1.8465313171321942 88.0 1.2199999999999989
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 266 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf18 2.528537397828869 1.8330988121074523 88.0 1.2199999999999989
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf19 2.531670576114998 1.8357132731685366 88.0 1.2199999999999989
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf20 2.5294693760803577 1.8335105878862015 87.98 1.2399999999999949
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 268 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf21 2.5582293136941723 1.8476583031165972 87.98 1.2399999999999949
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 156 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf22 2.556327374925176 1.8481587827658859 87.98 1.2399999999999949
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf23 2.557806470696261 1.8492020211230846 87.98 1.2399999999999949
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf24 2.5545697480449 1.8464092920718178 87.96000000000001 1.259999999999991
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 267 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf25 2.528206406642683 1.832658178797549 87.96000000000001 1.259999999999991
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf26 2.556533707152568 1.8484262997816934 87.96000000000001 1.259999999999991
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf27 2.5393059900815325 1.837123626585959 87.94 1.2800000000000011
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 265 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 269 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf28 2.5486219361262235 1.845481069177171 87.94 1.2800000000000011
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf29 2.5485321687357825 1.8461348600374907 87.94 1.2800000000000011
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf30 2.5657339222733015 1.8517901869245543 87.92 1.2999999999999972
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 263 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf31 2.581139532058275 1.860666047394923 87.92 1.2999999999999972
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf32 2.5098654459068945 1.8297655130336108 87.92 1.2999999999999972
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf33 2.528587182046725 1.8312521826965082 87.9 1.3199999999999932
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 156 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 266 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf34 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv fp16 11 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf35 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv fp16 11 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf36 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv fp16 11 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf37 2.517311952294846 1.8204468250382393 87.9 1.3199999999999932
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv fp16 11 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf38 2.5346932948358267 1.8376287813464989 87.9 1.3199999999999932
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 265 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf39 2.4914548049246 1.8095620501702707 87.86 1.3599999999999994
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv perf_fp16 162 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 268 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf40 2.5809312104420865 1.8607657818447936 87.86 1.3599999999999994
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf41 2.5120056276901925 1.824277681148882 87.83999999999999 1.3800000000000097
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 268 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 266 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf42 2.556168516896762 1.849243225747987 87.83999999999999 1.3800000000000097
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf43 2.512713457130698 1.8053797549107755 87.82 1.4000000000000057
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf44 2.509447559327321 1.8294109824358684 87.82 1.4000000000000057
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf45 2.532043246184595 1.8347717424454622 87.74 1.480000000000004
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv samp_fp16 265 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf46 2.4911011329750212 1.795311376068545 87.68 1.539999999999992
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 155 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 153 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv samp_fp16 269 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf47 2.549746515565958 1.8283676275816687 87.66000000000001 1.559999999999988
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv fp16 12 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+9 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf48 2.51145215830771 1.8254971754777813 87.64 1.5799999999999983
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 266 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf49 2.513356522647888 1.826263067419964 87.58 1.6400000000000006
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf50 2.513356522647888 1.826263067419964 87.53999999999999 1.6800000000000068
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf51 2.4881677905203494 1.8127135485543127 87.4 1.8199999999999932
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf52 2.51145215830771 1.8254971754777813 87.36 1.8599999999999994
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 266 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf53 2.4757784613808234 1.7991027289904775 87.26 1.9599999999999937
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 269 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv fp16 11 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf54 2.5913526715019284 1.8695479088125426 87.24 1.980000000000004
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv perf_fp16 163 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv perf_fp16 151 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar100/vgg16_cifar100_valid_soc.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar100/vgg16_cifar100_valid_soc.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef6509b99bee287bf0e3dfbaa035d51f9e3cb0ea
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/runtime_experiments/vgg16_cifar100/vgg16_cifar100_valid_soc.txt
@@ -0,0 +1,210 @@
+3845.438677999999
++++++
+conf1 1 1 68.42 0.0
+1 gpu conv fp32 11 add fp32 1 relu fp32 1
+2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+3 gpu conv fp32 11 add fp32 1 relu fp32 1
+4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+5 gpu conv fp32 11 add fp32 1 relu fp32 1
+6 gpu conv fp32 11 add fp32 1 relu fp32 1
+7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+8 gpu conv fp32 11 add fp32 1 relu fp32 1
+9 gpu conv fp32 11 add fp32 1 relu fp32 1
+10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+11 gpu conv fp32 11 add fp32 1 relu fp32 1
+12 gpu conv fp32 11 add fp32 1 relu fp32 1
+13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1
+14 gpu mul fp32 11 add fp32 1 relu fp32 1
+15 gpu mul fp32 11 add fp32 1
+16 gpu softmax fp32 1
+-----
++++++
+conf2 2.4361074671227554 1.7555866253938424 67.22 1.2000000000000028
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv fp16 11 add fp16 12 relu fp16 12
+12 gpu conv fp16 11 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 264 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf3 2.602684148359414 1.8286503060252126 67.10000000000001 1.3199999999999932
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 156 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv fp16 11 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf4 2.661880095451371 1.886369953641946 67.06 1.3599999999999994
+1 gpu conv fp16 12 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 156 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf5 2.5990656605003855 1.8588553950032938 66.84 1.5799999999999983
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 163 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf6 2.5884968081531485 1.8594972115815722 66.8 1.6200000000000045
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 165 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf7 2.4323231936537972 1.8028228076034056 66.8 1.6200000000000045
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf8 2.575472326184571 1.8375078883357683 66.72 1.7000000000000028
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv perf_fp16 161 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv fp16 11 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf9 2.4912510106198957 1.848807665058795 66.58 1.8400000000000034
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 266 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf10 2.4323231936537972 1.8028228076034056 66.53999999999999 1.8800000000000097
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 152 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----
++++++
+conf11 2.4027045398540046 1.7853827712848849 66.47999999999999 1.940000000000012
+1 gpu conv fp16 11 add fp16 12 relu fp16 12
+2 gpu conv samp_fp16 269 add fp16 12 relu fp16 12 pool_max fp16 12
+3 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+4 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+5 gpu conv fp16 12 add fp16 12 relu fp16 12
+6 gpu conv samp_fp16 261 add fp16 12 relu fp16 12
+7 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+8 gpu conv perf_fp16 155 add fp16 12 relu fp16 12
+9 gpu conv samp_fp16 262 add fp16 12 relu fp16 12
+10 gpu conv samp_fp16 262 add fp16 12 relu fp16 12 pool_max fp16 12
+11 gpu conv perf_fp16 160 add fp16 12 relu fp16 12
+12 gpu conv perf_fp16 151 add fp16 12 relu fp16 12
+13 gpu conv samp_fp16 261 add fp16 12 relu fp16 12 pool_max fp16 12
+14 gpu mul fp16 12 add fp16 12 relu fp16 12
+15 gpu mul fp16 12 add fp16 12
+16 gpu softmax fp16 12
+-----